#!/usr/bin/env python
# -*- coding:utf-8 -*-

"""
@author: xaoyaoyao
@contact: xaoyaoyao@aliyun.com
@file: js_spider.py
@time: 2018/08/28
"""
from selenium import webdriver
from scrapy.spiders import CrawlSpider
from scrapy import signals
from article_spider.settings import CHROME_DRIVER_FILE
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup


class JSSpider(CrawlSpider):
    def __init__(self):
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        self.browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=CHROME_DRIVER_FILE)
        self.browser.set_page_load_timeout(45)
        self.browser.maximize_window()

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(JSSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider

    def spider_closed(self, spider):
        self.logger.error(self.name + ' spider closed')
        # 当爬虫退出的时关闭浏览器
        self.browser.quit()
